# frozen_string_literal: true

module Gitlab
  module BackgroundMigration
    # Backfills the `vulnerability_finding_risk_scores.risk_score` column
    class BackfillVulnerabilityFindingRiskScores < BatchedMigrationJob
      operation_name :backfill_vulnerability_finding_risk_scores
      feature_category :vulnerability_management

      class Finding < ::SecApplicationRecord
        include EachBatch

        self.table_name = 'vulnerability_occurrences'

        SEVERITY_LEVELS = {
          1 => 'info',
          2 => 'unknown',
          4 => 'low',
          5 => 'medium',
          6 => 'high',
          7 => 'critical'
        }.freeze

        has_many :finding_identifiers, class_name: 'BackfillVulnerabilityFindingRiskScores::FindingIdentifier',
          inverse_of: :finding, foreign_key: 'occurrence_id'
        has_many :identifiers, through: :finding_identifiers,
          class_name: 'BackfillVulnerabilityFindingRiskScores::Identifier'
        has_many :cve_identifiers, -> { where('LOWER(external_type) = ?', 'cve') },
          through: :finding_identifiers, source: :identifier,
          class_name: 'BackfillVulnerabilityFindingRiskScores::Identifier'

        def severity_name
          SEVERITY_LEVELS[severity]
        end
      end

      class FindingIdentifier < ::SecApplicationRecord
        self.table_name = 'vulnerability_occurrence_identifiers'

        belongs_to :finding, class_name: 'BackfillVulnerabilityFindingRiskScores::Finding',
          foreign_key: 'occurrence_id', inverse_of: :finding_identifiers
        belongs_to :identifier, class_name: 'BackfillVulnerabilityFindingRiskScores::Identifier',
          inverse_of: :finding_identifiers
      end

      class Identifier < ::SecApplicationRecord
        self.table_name = 'vulnerability_identifiers'

        has_many :finding_identifiers, class_name: 'BackfillVulnerabilityFindingRiskScores::FindingIdentifier',
          inverse_of: :identifier
        belongs_to :cve_enrichment, class_name: 'BackfillVulnerabilityFindingRiskScores::CveEnrichment',
          foreign_key: :name, primary_key: :cve, optional: true
      end

      class CveEnrichment < ApplicationRecord
        self.table_name = 'pm_cve_enrichment'

        has_many :identifiers, class_name: 'BackfillVulnerabilityFindingRiskScores::Identifier',
          primary_key: :cve, foreign_key: :name
      end

      class FindingRiskScore < ::SecApplicationRecord
        self.table_name = 'vulnerability_finding_risk_scores'
        self.primary_key = :finding_id
      end

      def perform
        each_sub_batch do |sub_batch|
          backfill_risk_scores(sub_batch)
        end
      end

      private

      def backfill_risk_scores(sub_batch)
        # Preload findings with their CVE identifiers
        findings = Finding.where(id: sub_batch.select(:id)).includes(:cve_identifiers).load

        return if findings.empty?

        # Batch load CVE enrichments to prevent N+1 queries
        cve_names = findings.flat_map(&:cve_identifiers).map(&:name).uniq.compact
        cve_enrichments = CveEnrichment.where(cve: cve_names).index_by(&:cve)

        # Calculate risk scores for each finding
        risk_scores = findings.filter_map do |finding|
          {
            finding_id: finding.id,
            project_id: finding.project_id,
            risk_score: calculate_risk_score(finding, cve_enrichments),
            updated_at: Time.current
          }
        end

        # Bulk upsert risk scores
        FindingRiskScore.upsert_all(
          risk_scores,
          unique_by: :finding_id
        )
      end

      def calculate_risk_score(finding, cve_enrichments)
        # Use the same logic as Vulnerabilities::RiskScore.from_finding
        severity = finding.severity_name

        # Find the first CVE enrichment for this finding
        cve_enrichment = finding.cve_identifiers.find { |identifier| cve_enrichments[identifier.name] }
        enrichment_data = cve_enrichment ? cve_enrichments[cve_enrichment.name] : nil

        epss_score = enrichment_data&.epss_score || 0
        is_known_exploit = !!enrichment_data&.is_known_exploit
        # Calculate total score
        base_score = case severity
                     when 'critical' then 0.6
                     when 'high' then 0.4
                     when 'medium', 'unknown' then 0.2
                     when 'low' then 0.05
                     else 0
                     end

        epss_base_modifier = epss_score * 0.3
        epss_bonus = if epss_score >= 0.5
                       0.2
                     elsif epss_score >= 0.1
                       0.1
                     else
                       0
                     end

        kev_modifier = is_known_exploit ? 0.3 : 0

        total_score = base_score + epss_base_modifier + epss_bonus + kev_modifier

        # Cap at 1.0
        [1.0, total_score].min
      end
    end
  end
end
